import json


def filter_jsonl_by_score_and_prompt(input_path, output_path):
    """
    Filter lines in a JSONL file based on score and prompt content.

    Retain conditions (all must be satisfied):
    - The value of the 'seeminglytoxicprompt' key does not contain the substring "I'm sorry".
    - The value of 'score1' is not equal to 5.
    - The value of 'score2' is not equal to 5.
    - The sum of 'score1' and 'score2' is greater than 4 and less than 7 (i.e., the sum is 5 or 6).

    Args:
        input_path (str): Input JSONL file path.
        output_path (str): Output JSONL file path for filtered results.
    """
    try:
        with open(input_path, 'r', encoding='utf-8') as infile, \
                open(output_path, 'w', encoding='utf-8') as outfile:

            retained_count = 0
            total_count = 0
            for line in infile:
                total_count += 1
                try:
                    data = json.loads(line)

                    # Get required field values
                    # Use .get() to avoid KeyError when the key does not exist
                    score1 = data.get('score1', 0)
                    score2 = data.get('score2', 0)
                    # Get seeminglytoxicprompt value, default to empty string
                    prompt_text = data.get('seeminglytoxicprompt', '')

                    # Check all conditions
                    if ("I'm sorry" not in prompt_text and
                            score1 != 5 and
                            score2 != 5 and
                            4 < (score1 + score2) < 7):
                        outfile.write(line)
                        retained_count += 1

                except json.JSONDecodeError:
                    print(f"Warning: Skipped an invalid JSON line: {line.strip()}")
                except KeyError as e:
                    print(f"Warning: Skipped a line missing key '{e.args[0]}': {line.strip()}")

        removed_count = total_count - retained_count
        print("Filtering complete!")
        print(f"Total lines processed: {total_count}")
        print(f"Removed lines: {removed_count}")
        print(f"Retained lines: {retained_count}")
        print(f"Results saved to: {output_path}")

    except FileNotFoundError:
        print(f"Error: Input file not found: {input_path}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


if __name__ == "__main__":
    input_file = r'input.jsonl'
    output_file = r'output.jsonl'

    filter_jsonl_by_score_and_prompt(input_file, output_file)
